library(dplyr)
#Loading data sets and cleans up everything else in the workspace
rm(list= ls())
dataset_leip <- readRDS("D:/Replication/dataset_leip.rds")
load("D:/Replication/dataset_dataverse.RData")
dataset_dataverse = x
rm(x)

#Merging both data sets (inner merge)
dataset_leip_dataverse <- merge(dataset_dataverse, dataset_leip, id="fips")

#Creating participation variables

year_difference <- function(dataset, year, difference, column){
  #Dynamic column names
  year_N = year - difference
  curr_col = paste0(column,year)
  prev_col = paste0(column,year_N)
  diff_col = paste0(paste0("d",curr_col),year_N)
  
  #Calculate difference
  this_difference <- dataset[c(curr_col)] - dataset_leip_dataverse[c(prev_col)]
  names(this_difference)[1] <- "pre_diff"
  
  #Find 1% and 99% quantiles
  this_quantile <- c(quantile(this_difference$pre_diff,c(0.01,0.99), na.rm = TRUE))
  
  #Filter quantiles to Nan
  this_difference %>%
    mutate(diff_filtered = ifelse((pre_diff > this_quantile[2] | pre_diff < this_quantile[1]), NaN, pre_diff)) -> this_difference
  
  names(this_difference)[2] <- diff_col
  
  return(this_difference[c(diff_col)])
}

for(year in seq(2004, 2016, 2)) {
  #Dynamic variable names
  turnoutName = paste0("turnout",year)
  registrationName = paste0("registration",year)
  totalvotingName = paste0("total_voting_",year)
  totalregisteredName = paste0("total_registered_", year)
  vapName = paste0("vap",year)
  
  
  ### Turnout
  #Creating and filtering turnout variable
  this_turnout <- dataset_leip_dataverse[c(totalvotingName)]*
           100/
           dataset_leip_dataverse[c(vapName)]
  names(this_turnout)[1] <- "pre_turnout"
  
  this_turnout %>%
    mutate(turnout = ifelse(round(pre_turnout) !=0 ,pre_turnout,NaN))-> this_turnout
  #Adding it to the main data set
  dataset_leip_dataverse[turnoutName] = this_turnout$turnout
  
  ### Registration
  #Creating and filtering registration variable
  this_reg <- dataset_leip_dataverse[c(totalregisteredName)]*
    100/
    dataset_leip_dataverse[c(vapName)]
  
  names(this_reg)[1] <- "pre_reg"
  
  this_reg %>%
    mutate(registration = ifelse(pre_reg !=0 ,pre_reg,NaN))-> this_reg
  #Adding it to the main data set
  dataset_leip_dataverse[registrationName] = this_reg$registration
  
  
  if(year>2004){
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 2, "turnout"))
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 2, "registration"))
  }
  if(year>2006){
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 4, "turnout"))
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 4, "registration"))
  }
  if(year>2008){
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 6, "turnout"))
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 6, "registration"))
  }
  if(year>2010){
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 8, "turnout"))
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 8, "registration"))
  }
  if(year>2012){
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 10, "turnout"))
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 10, "registration"))
  }
  if(year>2014){
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 12, "turnout"))
    dataset_leip_dataverse <- bind_cols(dataset_leip_dataverse, year_difference(dataset_leip_dataverse, year, 12, "registration"))
  }
}

saveRDS(dataset_leip_dataverse, file="dataset_leip_dataverse.rds")

head(dataset_leip_dataverse)